#delimit;
clear;
set more off;

global temp /Sastemp;
global path ~;
set mem 5000m;

/*================================================
 Program: water_pollution.do
 Author:  Avi Ebenstein
 Created: August 2008
 Purpose: Clean the water point data and calculate the average
          water quality by 6 levels of basin aggregation.
          The water quality data is saved as water_pollution_levels;
=================================================*/

*******************************************;
* Clean water point data, collapse by basin;
*******************************************;
  
use ~/pollution/GIS/water_points/Pollutants_Counties_watersheds.dta;
drop n9 n9_1 lead_1;
global mylist "overall_q permanga_n a_n lead oils volatile_p bod mercury dissolved_";

foreach i of global mylist{;
                           
gen n`i'=0;
replace n`i'=1 if `i'=="I";
replace n`i'=2 if `i'=="II";
replace n`i'=3 if `i'=="III";
replace n`i'=4 if `i'=="IV";
replace n`i'=5 if `i'=="V";
replace n`i'=6 if `i'=="VI";
replace n`i'=. if `i'=="N/A";
drop `i';
rename n`i' `i';
gen bad`i'=`i'>=4;                           
                         };

drop if overall_q==.;
gen dummy=1;

forvalues j=1/6{;
preserve;
foreach i of global mylist{;
                           rename `i' `i'_level`j';
                           rename bad`i' bad`i'_level`j';                           
                         };
collapse $mylist  bad* (sum) dummy , by(level`j');
gen level=`j';
save ~/pollution/datafiles/water_pollution_level`j', replace;
restore;                
              };

use ~/pollution/datafiles/water_pollution_level1,clear;
forvalues j=2/6{;
                append using ~/pollution/datafiles/water_pollution_level`j';
              };
save ~/pollution/datafiles/water_pollution_levels, replace;


*******************************************;
* Save water quality data at province level;
*******************************************;
  
use ~/pollution/GIS/water_points/Pollutants_Counties_watersheds.dta;
drop n9 n9_1 lead_1;
global mylist "overall_q permanga_n a_n lead oils volatile_p bod mercury dissolved_";

foreach i of global mylist{;
                           
gen n`i'=0;
replace n`i'=1 if `i'=="I";
replace n`i'=2 if `i'=="II";
replace n`i'=3 if `i'=="III";
replace n`i'=4 if `i'=="IV";
replace n`i'=5 if `i'=="V";
replace n`i'=6 if `i'=="VI";
replace n`i'=. if `i'=="N/A";                           
drop `i';
rename n`i' `i';
gen bad`i'=`i'>=4;                           
                         };
drop if overall_q==0|overall_q==.;
rename provgb provnum;
drop if provnum==.|provnum==0;
gen dummy=1;
collapse (mean) $mylist (sum) dummy, by(provnum);
save ~/pollution/datafiles/water_pollution_province, replace;
